# Goal: Obtain measures of temporal stability and predicitability
# Jennifer Pan and Yiqing Xu


#####################################################
## Temporal Stability and Predictability: Sample 2
#####################################################

rm(list=ls(all=TRUE))

d <- haven::read_dta("data/sample2.dta")
ls()
names(d)
dim(d)
# drop speeders and those who fail the attention check
d2 <- d[which(d$valid2 == 1 & d$wave2 == 1),] 
dim(d2) ## 445

question.list <- list(
  paste0("s1_", 1:14),
  paste0("s2_", 1:14),
  paste0("s3_", 1:14),
  paste0("s4_", 1:7),
  paste0("s5_", 1:7),
  paste0("s6_", 1:7)
)


sign.list <- list(
  c(1, -1, -1, 1, -1, 1, -1, 1, 1, 1, -1, -1, 1, -1), # nationalism * (-1)
  c(-1, -1, 1, 1, -1, -1, 1, 1, -1, 1, -1, -1, 1, 1), # political
  c(-1, 1, -1, -1, 1, 1, 1, -1, 1, -1, 1, -1, 1, -1), # market
  c(1, -1, -1, -1, 1, 1, 1), # traditionalism * (-1)
  c(-1, 1, -1, 1, 1, 1, -1), # social equality
  c(1, -1, 1, -1, -1, 1, -1)  # minority accommodation
)


# this is going to run for hours
library(Amelia)
library(doParallel)
library(foreach)
cl<-makeCluster(16)  # the cluster name should be reserved
registerDoParallel(cl)
nsims <- 100
predict <- c()
w1vars <- c("index_nati","index_poli","index_econ","index_trad","index_equi","index_ethn")
w2vars <- c("index2_nati","index2_poli","index2_econ","index2_trad","index2_equi","index2_ethn")
for (category in 1:6) {
  cat("\ncategory",category,"\n")
  full.index1 <- d2[w1vars[category]]	
  full.index2 <- d2[w2vars[category]]	
  signs <- sign.list[[category]]
  questions <- question.list[[category]]
  nitems <- length(questions)
  mat.corr <- matrix(NA,0,3)
  colnames(mat.corr) <- c("nitems","stab","pred")
  for (i in 1:nitems) {
    cat(i)
    all.comb <- combn(1:nitems, i)
    ncomb <- ncol(all.comb) # number of combinations
    output <- matrix(NA, ncomb, 2)
    output <- foreach (k = 1:ncomb, .combine=rbind,.inorder=FALSE,.packages = c("Amelia")) %dopar% {
      item.no <- all.comb[,k]
      var1 <- paste0("s",category,"_",item.no)
      var2 <- paste0("t",category,"_",item.no)
      if (length(item.no)==1) { ## one item
        index1 <- d2[,var1]
        index2 <- d2[,var2]		
      } else if (length(item.no)==nitems) { ## full length
        index1 <- full.index1
        index2 <- full.index2
      } else {
        capture.output(mout1 <- amelia(d2[,var1], m = nsims,   ords = var1))
        capture.output(mout2 <- amelia(d2[,var2], m = nsims,   ords = var2))				
        newd1 <- as.matrix(Reduce("+",mout1$imputations)/nsims)
        newd2 <- as.matrix(Reduce("+",mout2$imputations)/nsims)
        index1 <- c(newd1%*%signs[item.no])						
        index2 <- c(newd2%*%signs[item.no])							
      }
      out <- c(cor(index1,index2, use = "pairwise.complete.obs"),	## stability 	
               cor(index1,full.index2, use = "pairwise.complete.obs"))		## predictability			
      return(abs(out))
    }
    output <- matrix(output, ncomb, 2)
    mat.corr <- rbind(mat.corr, cbind(rep(i,ncomb),output))	
  }
  mat.corr <- as.data.frame(mat.corr)
  mat.corr$nitems <- as.factor(mat.corr$nitems)
  predict <- c(predict, list(mat.corr))
  save(predict, file = "output/s_temporal.RData")
}